# ============================== SETUP ===============================
rm(list = ls())
options(scipen = 999)
setwd("~/Dropbox/Wayne-Ying/White_Nationalist_Recruitment/replication/codes")
library(data.table)
library(fixest)
library(texreg)
library(ggplot2)

# ========================= DATA WRANGLING ==========================
tweets <- fread("../datasets/input/reposttweets.csv", stringsAsFactors = FALSE)
gabs   <- fread("../datasets/input/repostgabs.csv", stringsAsFactors = FALSE)

tweets$race_dict        <- ifelse(tweets$race_dict >= 2, 1, 0)
tweets$gender_dict      <- ifelse(tweets$gender_dict >= 2, 1, 0)
tweets$nationalism_dict <- ifelse(tweets$nationalism_dict >= 2, 1, 0)
tweets$partisan_dict    <- ifelse(tweets$partisan_dict >= 2, 1, 0)
tweets$religion_dict    <- ifelse(tweets$religion_dict >= 2, 1, 0)

gabs$race_dict          <- ifelse(gabs$race_dict >= 2, 1, 0)
gabs$gender_dict        <- ifelse(gabs$gender_dict >= 2, 1, 0)
gabs$nationalism_dict   <- ifelse(gabs$nationalism_dict >= 2, 1, 0)
gabs$partisan_dict      <- ifelse(gabs$partisan_dict >= 2, 1, 0)
gabs$religion_dict      <- ifelse(gabs$religion_dict >= 2, 1, 0)

tweets$n_themes <- tweets$race_dict + tweets$gender_dict + tweets$nationalism_dict + tweets$partisan_dict + tweets$religion_dict
table(tweets$n_themes)
gabs$n_themes   <- gabs$race_dict + gabs$gender_dict + gabs$nationalism_dict + gabs$partisan_dict + gabs$religion_dict
table(gabs$n_themes)

head(tweets$created_at)
tweets$created_at <- as.POSIXct(tweets$created_at, format = "%Y-%m-%dT%H:%M:%S", tz = "UTC")
tweets$date  <- as.Date(tweets$created_at)
tweets$month <- zoo::as.yearmon(tweets$date)
tweets$year  <- lubridate::year(tweets$date)
tweets <- tweets[tweets$date >= as.Date("2016-08-15") & tweets$date <= as.Date("2021-05-15"), ]
sum(is.na(tweets$date))

head(gabs$created_at)
gabs$date  <- as.Date(gabs$created_at)
gabs$month <- zoo::as.yearmon(gabs$date)
gabs$year  <- lubridate::year(gabs$date)
gabs <- gabs[gabs$date >= as.Date("2016-08-15") & gabs$date <= as.Date("2021-05-15"), ]
sum(is.na(gabs$date))

colnames(tweets)
colnames(tweets)[colnames(tweets) == "public_metrics.followers_count"] <- "followers_count"
colnames(tweets)[colnames(tweets) == "public_metrics.tweet_count"]     <- "statuses_count"
colnames(tweets)[colnames(tweets) == "public_metrics.retweet_count"]   <- "reblogs_count"
colnames(gabs)
tweets$platform <- "twitter"
gabs$platform   <- "gab"

gabtweet <- rbind(
  tweets[, c("created_at", "username", "seed", "RelevantLogit", "ads_dict", "ntoken",
             "gender_dict", "nationalism_dict", "partisan_dict", "race_dict", "religion_dict", "folleader_repost",
             "out_repost", "reblogs_count", "followers_count", "statuses_count", "n_themes", "date",
             "month", "year", "platform")],
  gabs[, c("created_at", "username", "seed", "RelevantLogit", "ads_dict", "ntoken",
           "gender_dict", "nationalism_dict", "partisan_dict", "race_dict", "religion_dict", "folleader_repost",
           "out_repost", "reblogs_count", "followers_count", "statuses_count", "n_themes", "date",
           "month", "year", "platform")]
)

summary(gabtweet$out_repost)
summary(gabtweet$folleader_repost)
summary(gabtweet$reblogs_count)
gabtweet$out_repost_log      <- log(gabtweet$out_repost + 1)
gabtweet$folleader_repost_log <- log(gabtweet$folleader_repost + 1)
gabtweet$repost_log          <- log(gabtweet$reblogs_count + 1)
gabtweet$ntoken10            <- gabtweet$ntoken / 10

# ============================== ALL FOLLOWERS ===============================
moda <- feols(repost_log ~ race_dict + nationalism_dict + gender_dict + partisan_dict + religion_dict + ntoken10 | date + platform,
              data = gabtweet[gabtweet$seed == 1, ])
summary(moda, cluster = ~ username)

# ================================ FIGURE 3 ==================================
all <- rbind(
  unname(c("Race",        "All Followers", summary(moda, cluster = ~ username)$coefficients["race_dict"],        summary(moda, cluster = ~ username)$se["race_dict"])),
  unname(c("Nationalism", "All Followers", summary(moda, cluster = ~ username)$coefficients["nationalism_dict"], summary(moda, cluster = ~ username)$se["nationalism_dict"])),
  unname(c("Gender",      "All Followers", summary(moda, cluster = ~ username)$coefficients["gender_dict"],      summary(moda, cluster = ~ username)$se["gender_dict"])),
  unname(c("Partisan",    "All Followers", summary(moda, cluster = ~ username)$coefficients["partisan_dict"],    summary(moda, cluster = ~ username)$se["partisan_dict"])),
  unname(c("Religion",    "All Followers", summary(moda, cluster = ~ username)$coefficients["religion_dict"],    summary(moda, cluster = ~ username)$se["religion_dict"]))
)
all <- as.data.frame(all)
colnames(all) <- c("theme", "network", "coefficients", "se")
all$coefficients <- as.numeric(all$coefficients)
all$se           <- as.numeric(all$se)

all$theme <- factor(all$theme,
                    levels = c("Race", "Nationalism", "Gender", "Partisan", "Religion"),
                    labels = c("Race", "Nationalism", "Gender", "Partisan", "Religion"))

pdf("../plots/Figure3.pdf", width = 12, height = 3.5)
ggplot(all, aes(x = network, y = coefficients)) +
  geom_segment(aes(x = network, xend = network,
                   y = coefficients - 1.96 * se, yend = coefficients + 1.96 * se,
                   color = "coefficient with 95% confidence interval"),
               size = 1) +
  geom_point(aes(color = "coefficient with 95% confidence interval"), size = 3) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~ theme, nrow = 1) +
  coord_flip() +
  scale_color_manual(
    name = "Legend",
    values = c("coefficient with 95% confidence interval" = "black"),
    labels = c("coefficient with 95% confidence interval")
  ) +
  xlab("") +
  ylab("") +
  theme(
    panel.background  = element_blank(),
    panel.grid.major  = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x       = element_text(size = 12),
    axis.text.y       = element_text(size = 16),
    strip.text        = element_text(size = 16),
    panel.border      = element_rect(colour = "black", fill = FALSE),
    strip.background  = element_rect(colour = "black"),
    axis.title        = element_text(size = 16),
    legend.title      = element_blank(),
    legend.text       = element_text(size = 16),
    legend.position   = "bottom"
  ) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
dev.off()

# ===================== PERIPHERAL AND CORE FOLLOWERS ========================
modo <- feols(out_repost_log ~ race_dict + nationalism_dict + gender_dict + partisan_dict + religion_dict + ntoken10 | date + platform,
              data = gabtweet[gabtweet$seed == 1, ])
summary(modo, cluster = ~ username)

modi <- feols(folleader_repost_log ~ race_dict + nationalism_dict + gender_dict + partisan_dict + religion_dict + ntoken10 | date + platform,
              data = gabtweet[gabtweet$seed == 1, ])
summary(modi, cluster = ~ username)

# ================================= TABLE A11 ================================
### Export all regression results as a table
texreg(list(summary(moda, cluster = ~ username),
            summary(modi, cluster = ~ username),
            summary(modo, cluster = ~ username)), stars = c(0.05, 0.1))

# ================================= FIGURE 4 ================================
breakdown <- rbind(
  unname(c("Race",        "Core Followers",       summary(modi, cluster = ~ username)$coefficients["race_dict"],        summary(modi, cluster = ~ username)$se["race_dict"])),
  unname(c("Nationalism", "Core Followers",       summary(modi, cluster = ~ username)$coefficients["nationalism_dict"], summary(modi, cluster = ~ username)$se["nationalism_dict"])),
  unname(c("Gender",      "Core Followers",       summary(modi, cluster = ~ username)$coefficients["gender_dict"],      summary(modi, cluster = ~ username)$se["gender_dict"])),
  unname(c("Partisan",    "Core Followers",       summary(modi, cluster = ~ username)$coefficients["partisan_dict"],    summary(modi, cluster = ~ username)$se["partisan_dict"])),
  unname(c("Religion",    "Core Followers",       summary(modi, cluster = ~ username)$coefficients["religion_dict"],    summary(modi, cluster = ~ username)$se["religion_dict"])),
  
  unname(c("Race",        "Peripheral Followers", summary(modo, cluster = ~ username)$coefficients["race_dict"],        summary(modo, cluster = ~ username)$se["race_dict"])),
  unname(c("Nationalism", "Peripheral Followers", summary(modo, cluster = ~ username)$coefficients["nationalism_dict"], summary(modo, cluster = ~ username)$se["nationalism_dict"])),
  unname(c("Gender",      "Peripheral Followers", summary(modo, cluster = ~ username)$coefficients["gender_dict"],      summary(modo, cluster = ~ username)$se["gender_dict"])),
  unname(c("Partisan",    "Peripheral Followers", summary(modo, cluster = ~ username)$coefficients["partisan_dict"],    summary(modo, cluster = ~ username)$se["partisan_dict"])),
  unname(c("Religion",    "Peripheral Followers", summary(modo, cluster = ~ username)$coefficients["religion_dict"],    summary(modo, cluster = ~ username)$se["religion_dict"]))
)

breakdown <- as.data.frame(breakdown)
colnames(breakdown) <- c("theme", "network", "coefficients", "se")
breakdown$coefficients <- as.numeric(breakdown$coefficients)
breakdown$se           <- as.numeric(breakdown$se)

breakdown$theme <- factor(breakdown$theme,
                          levels = c("Race", "Nationalism", "Gender", "Partisan", "Religion"),
                          labels = c("Race", "Nationalism", "Gender", "Partisan", "Religion"))
breakdown$network <- factor(breakdown$network,
                            levels = c("Peripheral Followers", "Core Followers"))

pdf("../plots/Figure4.pdf", width = 12, height = 4.5)
ggplot(breakdown, aes(x = network, y = coefficients)) +
  geom_segment(aes(x = network, xend = network,
                   y = coefficients - 1.96 * se, yend = coefficients + 1.96 * se,
                   color = "coefficient with 95% confidence interval"),
               size = 1) +
  geom_point(aes(color = "coefficient with 95% confidence interval"), size = 3) +
  geom_hline(yintercept = 0, color = "red4") +
  facet_wrap(~ theme, nrow = 1) +
  coord_flip() +
  scale_color_manual(
    name  = "Legend",
    values = c("coefficient with 95% confidence interval" = "black"),
    labels = c("coefficient with 95% confidence interval")
  ) +
  xlab("") +
  ylab("") +
  theme(
    panel.background  = element_blank(),
    panel.grid.major  = element_line(colour = "gray90", linetype = "solid"),
    axis.text.x       = element_text(size = 12),
    axis.text.y       = element_text(size = 16),
    strip.text        = element_text(size = 16),
    panel.border      = element_rect(colour = "black", fill = FALSE),
    strip.background  = element_rect(colour = "black"),
    axis.title        = element_text(size = 16),
    legend.title      = element_blank(),
    legend.text       = element_text(size = 16),
    legend.position   = "bottom"
  ) +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.1))
dev.off()

# ============================ TABLE A12: USER FIXED EFFECTS ============================
moda <- feols(repost_log ~ race_dict + nationalism_dict + gender_dict + partisan_dict + religion_dict + ntoken10 | date + platform + username,
              data = gabtweet[gabtweet$seed == 1, ])
modo <- feols(out_repost_log ~ race_dict + nationalism_dict + gender_dict + partisan_dict + religion_dict + ntoken10 | date + platform + username,
              data = gabtweet[gabtweet$seed == 1, ])
modi <- feols(folleader_repost_log ~ race_dict + nationalism_dict + gender_dict + partisan_dict + religion_dict + ntoken10 | date + platform + username,
              data = gabtweet[gabtweet$seed == 1, ])

texreg(list(summary(moda, cluster = ~ username),
            summary(modi, cluster = ~ username),
            summary(modo, cluster = ~ username)), stars = c(0.05, 0.1))
